import numpy as np
import os
import shutil
from pathlib import Path
import xml.etree.ElementTree as ET
from glob import glob
import xmltodict
from PIL import Image, ImageDraw
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from PIL import Image
import yaml
# count files and set root dirs
print(os.getcwd())
root_annots_path = '../data/annotations/'
root_images_path = '../data/images/'
annots_path = sorted([i for i in Path(root_annots_path).glob('*.xml')])
images_path = sorted([i for i in Path(root_images_path).glob('*.png')])
n_imgs, n_ann = len(images_path), len(annots_path)
print("Number of images and annotations {} {}".format(n_imgs, n_ann))
/home/temir/Documents/Kaggle/Siemense/yolov5 Number of images and annotations 3686 3686
# helper functions
def read_object_bbox(dict_object):
class_name = dict_object['name']
xmin = int(dict_object['bndbox']['xmin'])
ymin = int(dict_object['bndbox']['ymin'])
xmax = int(dict_object['bndbox']['xmax'])
ymax = int(dict_object['bndbox']['ymax'])
return class_name, xmin, ymin, xmax, ymax
def read_image_data(dict_size):
height = int(dict_size['height'])
width = int(dict_size['width'])
channels = int(dict_size['depth'])
return height, width, channels
text_labels_dir = '../data/labels_yolo/'
os.makedirs(text_labels_dir, exist_ok = True)
annotation_list = []
for label_path in annots_path:
with open(label_path) as file:
file_data = file.read() # read file contents
# parse data using package
dict_data = xmltodict.parse(file_data)['annotation']
filename = dict_data['filename']
# read annotations
img_height, img_width, channels = read_image_data(dict_data['size'])
class_name, xmin, ymin, xmax, ymax = read_object_bbox(dict_data['object'])
# transform to yolo format
x = ((xmin + xmax) / 2) / img_width
y = ((ymin + ymax) / 2) / img_height
width = (xmax - xmin) / img_width
height = (ymax - ymin) / img_height
class_label = 0 if class_name=='cat' else 1
# All our informations
object_ann_list= [filename, img_height, img_width, channels, class_name, xmin, ymin, xmax, ymax, class_label, x, y, width, height]
# Saving yolo format labels
save_file_name = os.path.join(text_labels_dir+ filename.replace("png", "txt"))
with open(save_file_name, 'w') as f :
f.write(" ".join([str(x) for x in object_ann_list[-5:]]))
annotation_list.append(object_ann_list)
# Creating dataframe to analyze datasets
annotation_df = pd.DataFrame(annotation_list,
columns=["filename", "img_height", "img_width", "channels", "class_name","xmin", "ymin", "xmax", "ymax", "class_label" ,"x", "y", "width", "height",])
annotation_df.to_pickle("../data/annotation_df.pkl")
# To check if bbox are exits outrange of the image
annotation_df[annotation_df.img_height<annotation_df.ymax]
| filename | img_height | img_width | channels | class_name | ... | class_label | x | y | width | height |
|---|
0 rows × 14 columns
annotation_df[annotation_df.img_width<annotation_df.xmax]
| filename | img_height | img_width | channels | class_name | ... | class_label | x | y | width | height |
|---|
0 rows × 14 columns
annotation_df.head()
| filename | img_height | img_width | channels | class_name | class_label | x | y | width | height | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Cats_Test0.png | 350 | 233 | 3 | cat | 0 | 0.600858 | 0.244286 | 0.489270 | 0.322857 |
| 1 | Cats_Test1.png | 500 | 500 | 3 | dog | 1 | 0.368000 | 0.244000 | 0.224000 | 0.400000 |
| 2 | Cats_Test10.png | 375 | 500 | 3 | dog | 1 | 0.421000 | 0.229333 | 0.214000 | 0.160000 |
| 3 | Cats_Test100.png | 375 | 500 | 3 | cat | 0 | 0.348000 | 0.472000 | 0.128000 | 0.170667 |
| 4 | Cats_Test1000.png | 500 | 335 | 3 | cat | 0 | 0.485075 | 0.326000 | 0.755224 | 0.596000 |
annotation_df.class_label.hist()
annotation_df.class_label.value_counts()
# classes are not balanced cat are twice less than dogs
# but is should not be problem while we randomly split data so test and trains set will have same proportion of dogs and cats
1 2498 0 1188 Name: class_label, dtype: int64
annotation_df['area'] = annotation_df['img_width']*annotation_df['img_height']
annotation_df['area'].plot.hist()
# we do not see any anomalises related to area
<AxesSubplot: ylabel='Frequency'>
annotation_df.describe()
# We can check here that everything is ok with x,y,w,h as they are inside of 0 and 1
| img_height | img_width | channels | class_label | x | y | width | height | area | |
|---|---|---|---|---|---|---|---|---|---|
| count | 3686.000000 | 3686.000000 | 3686.0 | 3686.000000 | 3686.000000 | 3686.000000 | 3686.000000 | 3686.000000 | 3686.000000 |
| mean | 377.466630 | 424.349702 | 3.0 | 0.677699 | 0.497839 | 0.368137 | 0.411714 | 0.447086 | 160962.447640 |
| std | 89.300112 | 94.009541 | 0.0 | 0.467421 | 0.120496 | 0.106439 | 0.187359 | 0.182669 | 47057.674922 |
| min | 108.000000 | 114.000000 | 3.0 | 0.000000 | 0.121000 | 0.097619 | 0.054000 | 0.066066 | 15552.000000 |
| 25% | 333.000000 | 333.000000 | 3.0 | 0.000000 | 0.429707 | 0.291667 | 0.270000 | 0.302622 | 166000.000000 |
| 50% | 375.000000 | 500.000000 | 3.0 | 1.000000 | 0.499000 | 0.361500 | 0.380000 | 0.418667 | 167000.000000 |
| 75% | 500.000000 | 500.000000 | 3.0 | 1.000000 | 0.568000 | 0.438935 | 0.521935 | 0.564378 | 187500.000000 |
| max | 600.000000 | 600.000000 | 3.0 | 1.000000 | 0.908333 | 0.789000 | 0.997403 | 0.997333 | 360000.000000 |
def yolo_to_pascal_voc(bbox_data):
x_center, y_center, w, h, image_w, image_h =bbox_data
w = w * image_w
h = h * image_h
x1 = ((2 * x_center * image_w) - w)/2
y1 = ((2 * y_center * image_h) - h)/2
x2 = x1 + w
y2 = y1 + h
return [x1, y1, x2, y2]
def plot_img(filename, bbox):
#get bbox
bbox_list = annotation_df.loc[annotation_df.filename==filename, ["x", "y", "width", "height","img_width", "img_height"]].values[0].tolist()
bbox = yolo_to_pascal_voc(bbox_list)
sample_image = Image.open(root_images_path + filename)
# sample_image.show()
x_image = sample_image.copy()
img_bbox = ImageDraw.Draw(x_image)
img_bbox.rectangle(bbox, outline="green",width = 5 )
display(x_image)
print(bbox)
return bbox
filename = 'Cats_Test0.png'
bbox_cat = plot_img(filename, bbox)
[83.0, 29.0, 197.0, 142.0]
filename = 'Cats_Test900.png'
bbox_dog = plot_img(filename, bbox)
[127.0, 108.0, 270.0, 246.0]
# Load model from hub just to see infence on sample images
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
Using cache found in /home/temir/.cache/torch/hub/ultralytics_yolov5_master YOLOv5 🚀 2022-12-11 Python-3.10.8 torch-1.13.0 CPU Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
0%| | 0.00/14.1M [00:00<?, ?B/s]
Fusing layers... YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients Adding AutoShape...
# Here we can Identify which classes we are looking for
model.classes = [15, 16]
filename1 = 'Cats_Test0.png'
filename2 = 'Cats_Test900.png'
im1 = Image.open(root_images_path + filename1)
im2 = Image.open(root_images_path + filename2)
results = model([im1, im2], size=320)
results
YOLOv5 <class 'models.common.Detections'> instance image 1/2: 350x233 1 cat image 2/2: 333x500 1 dog Speed: 7.5ms pre-process, 24.4ms inference, 0.6ms NMS per image at shape (2, 3, 320, 320)
results.pandas().xyxy[0] # im1 predictions (pandas)
| xmin | ymin | xmax | ymax | confidence | class | name | |
|---|---|---|---|---|---|---|---|
| 0 | 28.748392 | 30.900591 | 210.591934 | 312.377045 | 0.647688 | 15 | cat |
def plot_img_pred(filename,pred_bbox):
bbox_list = annotation_df.loc[annotation_df.filename==filename, ["x", "y", "width", "height","img_width", "img_height"]].values[0].tolist()
true_bbox = yolo_to_pascal_voc(bbox_list)
sample_image = Image.open(root_images_path + filename)
x_image = sample_image.copy()
img_bbox = ImageDraw.Draw(x_image)
img_bbox.rectangle(true_bbox, outline="red",width = 5 )
img_bbox.rectangle(pred_bbox, outline="green",width = 5 )
display(x_image)
pred_bbox_cat = results.pandas().xyxy[0].values[0][:4].tolist() # im2 predictions (pandas)
plot_img_pred(filename1, pred_bbox_cat)
pred_bbox_dog = results.pandas().xyxy[1].values[0][:4].tolist() # im2 predictions (pandas)
plot_img_pred(filename2, pred_bbox_dog)
# Here we can see that our annotations mainly takes faces of the cats and dogs
# While Yolo5 catches whole body.
# If we use directly inference from pretrained Yolov5s we will get low accuracy. It can be seen below.
def bb_intersection_over_union(boxA, boxB):
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
# compute the area of intersection rectangle
interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
if interArea == 0:
return 0
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
# return the intersection over union value
return iou
print("IOU cat:", bb_intersection_over_union(pred_bbox_cat, bbox_cat))
print("IOU dog:",bb_intersection_over_union(pred_bbox_dog, bbox_dog))
# As we exprected IOU is too low
IOU cat: 0.22370667322597185 IOU dog: 0.33379401352430105
# Spliting dataset
images_path =glob("../data/images/*.png")
train_list_total, test_list = train_test_split(images_path, test_size = 0.2, random_state = 0)
train_list, valid_list = train_test_split(train_list_total, test_size = 0.1, random_state = 0)
def write_file(path, text_list):
with open("../data/"+ path, 'w') as f:
f.write('\n'.join(text_list) + '\n')
write_file('train.txt',train_list )
write_file('valid.txt',valid_list )
write_file('test.txt',test_list )
# Creating data yaml file. Yolov5 will read it and get images from paths in txts.
# Here we also define number of classes we are going to train
with open('../data/data.yaml', 'w') as f:
data = {
'path':'../data/',
'train' : 'train.txt',
'val' : 'valid.txt',
'test' : 'test.txt',
'nc' : 2,
'names' : ['cat', 'dog']
}
yaml.dump(data, f)
os.getcwd()
'/home/temir/Documents/Kaggle/Siemense/notebooks'
%cd ../yolov5
!python train.py \
--img 640 \
--batch 20 \
--epochs 10 \
--data ../data/data.yaml \
--cfg ./models/yolov5s.yaml \
--weights yolov5s.pt \
--name siemence_cat_dogs
/home/temir/Documents/Kaggle/Siemense/yolov5 train: weights=yolov5s.pt, cfg=./models/yolov5s.yaml, data=../data/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=10, batch_size=20, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=siemence_cat_dogs, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest Command 'git fetch origin' timed out after 5 seconds YOLOv5 🚀 v7.0-30-g342fe05 Python-3.10.8 torch-1.13.0 CPU hyperparameters: lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0 ClearML: run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML Comet: run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet TensorBoard: Start with 'tensorboard --logdir runs/train', view at http://localhost:6006/ Overriding model.yaml nc=80 with nc=2 from n params module arguments 0 -1 1 3520 models.common.Conv [3, 32, 6, 2, 2] 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] 2 -1 1 18816 models.common.C3 [64, 64, 1] 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] 4 -1 2 115712 models.common.C3 [128, 128, 2] 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] 6 -1 3 625152 models.common.C3 [256, 256, 3] 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] 8 -1 1 1182720 models.common.C3 [512, 512, 1] 9 -1 1 656896 models.common.SPPF [512, 512, 5] 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 12 [-1, 6] 1 0 models.common.Concat [1] 13 -1 1 361984 models.common.C3 [512, 256, 1, False] 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 16 [-1, 4] 1 0 models.common.Concat [1] 17 -1 1 90880 models.common.C3 [256, 128, 1, False] 18 -1 1 147712 models.common.Conv [128, 128, 3, 2] 19 [-1, 14] 1 0 models.common.Concat [1] 20 -1 1 296448 models.common.C3 [256, 256, 1, False] 21 -1 1 590336 models.common.Conv [256, 256, 3, 2] 22 [-1, 10] 1 0 models.common.Concat [1] 23 -1 1 1182720 models.common.C3 [512, 512, 1, False] 24 [17, 20, 23] 1 18879 models.yolo.Detect [2, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]] YOLOv5s summary: 214 layers, 7025023 parameters, 7025023 gradients, 16.0 GFLOPs Transferred 342/349 items from yolov5s.pt optimizer: SGD(lr=0.01) with parameter groups 57 weight(decay=0.0), 60 weight(decay=0.00046875), 60 bias train: Scanning /home/temir/Documents/Kaggle/Siemense/data/train.cache... 2653 i val: Scanning /home/temir/Documents/Kaggle/Siemense/data/valid.cache... 295 imag AutoAnchor: 3.89 anchors/target, 1.000 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅ Plotting labels to runs/train/siemence_cat_dogs2/labels.jpg... Image sizes 640 train, 640 val Using 8 dataloader workers Logging results to runs/train/siemence_cat_dogs2 Starting training for 10 epochs... Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 0/9 0G 0.06982 0.02522 0.01992 23 640: 1 Class Images Instances P R mAP50 all 295 295 0.653 0.569 0.624 0.298 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 1/9 0G 0.04736 0.0168 0.008399 27 640: 1 Class Images Instances P R mAP50 all 295 295 0.639 0.738 0.727 0.39 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 2/9 0G 0.04178 0.01489 0.007198 27 640: 1 Class Images Instances P R mAP50 all 295 295 0.665 0.833 0.798 0.481 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 3/9 0G 0.03702 0.01393 0.007066 29 640: 1 Class Images Instances P R mAP50 all 295 295 0.822 0.835 0.876 0.565 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 4/9 0G 0.03404 0.01332 0.006941 28 640: 1 Class Images Instances P R mAP50 all 295 295 0.903 0.883 0.904 0.607 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 5/9 0G 0.03111 0.01319 0.006063 26 640: 1 Class Images Instances P R mAP50 all 295 295 0.943 0.942 0.973 0.692 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 6/9 0G 0.02845 0.01211 0.005054 30 640: 1 Class Images Instances P R mAP50 all 295 295 0.935 0.936 0.975 0.735 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 7/9 0G 0.02663 0.01164 0.004005 18 640: 1 Class Images Instances P R mAP50 all 295 295 0.958 0.967 0.986 0.771 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 8/9 0G 0.02394 0.01097 0.004021 29 640: 1 Class Images Instances P R mAP50 all 295 295 0.977 0.982 0.989 0.794 Epoch GPU_mem box_loss obj_loss cls_loss Instances Size 9/9 0G 0.02219 0.01057 0.002979 27 640: 1 Class Images Instances P R mAP50 all 295 295 0.974 0.982 0.991 0.813 10 epochs completed in 2.124 hours. Optimizer stripped from runs/train/siemence_cat_dogs2/weights/last.pt, 14.4MB Optimizer stripped from runs/train/siemence_cat_dogs2/weights/best.pt, 14.4MB Validating runs/train/siemence_cat_dogs2/weights/best.pt... Fusing layers... YOLOv5s summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs Class Images Instances P R mAP50 all 295 295 0.974 0.982 0.991 0.813 cat 295 99 0.985 0.99 0.994 0.86 dog 295 196 0.964 0.974 0.988 0.765 Results saved to runs/train/siemence_cat_dogs2
# import shutil
# from tqdm.notebook import tqdm
# test_list_filenames = [x.split("/")[-1] for x in test_list]
# os.makedirs("../data/test_images/", exist_ok=True)
# def copy_img(filename):
# source = "../data/images/" + filename
# destination = "../data/test_images/"+filename
# shutil.copy(source, destination)
# for filename in tqdm(test_list_filenames):
# copy_img(filename)
0%| | 0/738 [00:00<?, ?it/s]
# %cd /home/temir/Documents/Kaggle/Siemense/yolov5
!python val.py \
--data '../data/data.yaml' \
--weights "./runs/train/siemence_cat_dogs2/weights/best.pt" \
--task test \
--save-txt \
--max-det 1
val: data=../data/data.yaml, weights=['./runs/train/siemence_cat_dogs2/weights/best.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=1, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False YOLOv5 🚀 v7.0-30-g342fe05 Python-3.10.8 torch-1.13.0 CPU Fusing layers... YOLOv5s summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs test: Scanning /home/temir/Documents/Kaggle/Siemense/data/test.cache... 738 imag Class Images Instances P R mAP50 all 738 738 0.988 0.983 0.99 0.842 cat 738 237 0.996 0.987 0.993 0.883 dog 738 501 0.98 0.978 0.987 0.801 Speed: 0.7ms pre-process, 76.8ms inference, 0.4ms NMS per image at shape (32, 3, 640, 640) Results saved to runs/val/exp 738 labels saved to runs/val/exp/labels
# --imgsz
prediction_path = "./runs/val/exp/labels/"
dir_list = os.listdir(prediction_path)
ten_img_list = [x[:-4]+".png" for x in dir_list[:10]]
ten_img_list
# annotation_df = pd.read_pickle("../data/annotation_df.pkl")
def plot_img_pred_test_set(filename):
true_bbox = annotation_df.loc[annotation_df.filename==filename, ["xmin", "ymin", "xmax", "ymax"]].values[0].tolist()
sample_image = Image.open(root_images_path + filename)
img_width, img_height = sample_image.size
with open(prediction_path+filename[:-4]+".txt") as f:
lines = f.readlines()
pred_list = [float(x) for x in lines[0].split(" ")]
pred_list.append(img_width)
pred_list.append(img_height)
pred_bbox = yolo_to_pascal_voc(pred_list[1:])
x_image = sample_image.copy()
img_bbox = ImageDraw.Draw(x_image)
img_bbox.rectangle(true_bbox, outline="red",width = 5 )
img_bbox.rectangle(pred_bbox, outline="green",width = 5 )
iou = bb_intersection_over_union(true_bbox,pred_bbox)
img_bbox.text((0, 0), f"IOU: {iou:.3f}\nCLASS: {pred_list[0]}", fill ="yellow")
display(x_image)
for x in ten_img_list: plot_img_pred_test_set(x)
%cd /home/temir/Documents/Kaggle/Siemense/yolov5
!python val.py \
--data '../data/data.yaml' \
--imgsz 256 \
--weights "./runs/train/siemence_cat_dogs2/weights/best.pt" \
--task test \
--save-txt \
--max-det 1
/home/temir/Documents/Kaggle/Siemense/yolov5 val: data=../data/data.yaml, weights=['./runs/train/siemence_cat_dogs2/weights/best.pt'], batch_size=32, imgsz=256, conf_thres=0.001, iou_thres=0.6, max_det=1, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False YOLOv5 🚀 v7.0-30-g342fe05 Python-3.10.8 torch-1.13.0 CPU Fusing layers... YOLOv5s summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs test: Scanning /home/temir/Documents/Kaggle/Siemense/data/test.cache... 738 imag Class Images Instances P R mAP50 all 738 738 0.967 0.957 0.975 0.773 cat 738 237 0.977 0.975 0.985 0.815 dog 738 501 0.957 0.939 0.966 0.732 Speed: 0.1ms pre-process, 14.3ms inference, 0.3ms NMS per image at shape (32, 3, 256, 256) Results saved to runs/val/exp3 738 labels saved to runs/val/exp3/labels